
/*

SUMMARY: 

THIS FILE CREATES AND TRIMS HOURLY WAGES FOR CHILDCARE WORKERS, PRESCHOOL, AND 
KINDDERGARTEN TEACHERS AGED 18+ AND FOR YEARS 1992-2019. THE SAMPLE EXTRACTED FROM 
THE CPS OUTGOING ROTATION GROUP. ANYONE WHO DOES NOT HAVE AN HOURLY WAGE OR A 
WEEKLY EARNINGS OBSERVATION IS DROPPED FROM THE SAMPLE. 


DETAILS:


-Reads in the Stata data set "CPS_E5.dta". 
-Restricts the sample to years 1992-2019, workers who are 18+, either have a observation for weekly earnings or an hourly wage, and are either a childcare worker (occ==4600) or a childcare worker, preschool, and kindergarten teacher (occ==2300) under the 2010 occupation classification. 
-Sets to missing all obersvation N.I.U or not known, creates occupation specific dummies, and creates and adjusts sample weights with usual hours worked from the full sample
-Creates an hourly wage measure that incldues weekly earnings converted into hourly with usual hours worked in a week in addition to hourly wage measures. 
-Trims the hourly wages by the top and bottom year-specific percentiles summarizing the measures both pre- and post-trimming. 
-Saves the cleaned data set as "CPS_v4.dta".


*/

*------------------------------------------------------------------------------*

cap log close
clear 
cls


use "$temp\CPS_E5.dta"

***********************
* SAMPLE RESTRICTIONS *
***********************

keep if age >= 18 & year >= 1992 & paidhour != 0 & (occ2010 == 4600 | occ2010 == 2300)							
drop if asecflag == 1 & month == 3

**********************************
* SETTING MISSING VARIABLE CODES *
**********************************

replace uhrswork1   = . if uhrswork1 == 999 | uhrswork1 == 997
replace uhrsworkorg = . if uhrsworkorg == 999 | uhrsworkorg == 998 			
replace earnweek 	= . if earnweek == 9999.99								
replace hourwage 	= . if hourwage == 999.99								   
replace paidhour    = . if paidhour == 0 

************************
* CREATING OCC DUMMIES *
************************

/*

NOTE: 

CCPSKT  = 2300 - Preschool and Kindergarten Teachers
CC      = 4600 - Childcare workers

*/

gen CC      = occ2010 == 4600
gen CCPSKT  = occ2010 == 2300 | occ2010 == 4600

label var CC "Childcare Workers"
label var CCPSKT "Preschool, Kindergarten Teachers, and Childcare Workers"

label define vocc1 		0 "Not PS & KT" 1 "PS & KT"
label define vocc2 		0 "Not Childcare" 1 "Childcare Worker"
label define vocc12 	0 "Not PS, KT, or CW" 1 "PS, KT, and Childcare Workers"

foreach var of varlist CC CCPSKT {
label values `var' v`var'
}

foreach var of varlist CC CCPSKT {

tab `var'

}

***********************************************************
* Checking EARNWEEK AND USUAL HOURS WORKED (UNRESTRICTED) *
***********************************************************

foreach var of varlist CC CCPSKT {

di as text "EARNWEEK FOR `var'"
summarize earnweek if `var' == 1 & paidhour ==1 & uhrswork1 < 100, detail 

di as text "USUAL HOURS WORKED FOR `var'"
summarize uhrswork1 if `var' == 1 & paidhour ==1 & uhrswork1 < 100, detail 

}

********************************************
* CREATING SAMPLE/ADJUSTING SAMPLE WEIGHTS *
********************************************

gen double earnwta = earnwt/12 													// Need to divide weights by 12 since each weight per month represents whole population (as per CPS Technical notes)
gen uhearnwt = earnwta*uhrswork1 if uhrswork1 < 100

*************************
* CREATING HOURLY WAGES *
*************************

gen double hw = earnweek/uhrswork1 if paidhour ==1 & uhrswork1 < 100 		    // Creating hourly wages for those who could only report weekly earnings

gen double hwage = hw if paidhour ==1
replace hwage 	 = hourwage if paidhour ==2
format hwage %5.2f

*************************
* TRIMMING HOURLY WAGES *
*************************

log using "$logfiles\wagetrim_cps.log", replace

* YEAR SPECIFIC HOURLY WAGES PRE-TRIMMING *
forvalues y = 1992(1)2019 {

di as text "PRE-TRIMMED HOURLY WAGE IN YEAR: `y'"
summarize hwage if year == `y', detail

}

* TRIMMING STEP *
foreach hw of varlist hwage {
forvalues y = 1992(1)2019 {

quietly summarize hwage if year == `y', detail
scalar rp1_`hw'_`y'  = r(p1)
scalar rp99_`hw'_`y' = r(p99) 

replace `hw' = . if (`hw' < rp1_`hw'_`y' | `hw' > rp99_`hw'_`y') & year == `y'

}
}

* YEAR-SPECIFIC HOURLY WAGES POST-TRIMMING *
forvalues y = 1992(1)2019 {

di as text "POST-TRIMMED HOURLY WAGE IN YEAR: `y'"
summarize hwage if year == `y', detail

}

log close

*****************
* MISCELLANEOUS *
*****************

gen stfip = statefip
rename statefip state

order year stfip state cpsid earnwt paidhour hourwage hwage earnweek uhrsworkorg occ2010

egen state_year = concat(year state)

*******************
* SAVING THE DATA *
*******************

save "$temp\CPS_v4.dta", replace


